Initiate this code¶

In [1]:
from collections import Counter
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import plotly as py
import numpy as np
from sklearn import preprocessing
import plotly.offline as pyo
import matplotlib.pyplot as plt
%matplotlib inline
import geopandas as gpd
import requests  # For retrieving data online
import json      # for working with json files
from shapely.geometry import Point, Polygon

def get_df_by_year(df: pd.DataFrame, year: int) -> pd.DataFrame:
    idx = df['year'] == year
    return df[idx]
In [2]:
df87 = pd.read_csv('data/1987_va.csv', sep=',')
df92 = pd.read_csv('data/1992_va.csv', sep=',')
df97 = pd.read_csv('data/1997_va.csv', sep=',')
df02 = pd.read_csv('data/2002_va.csv', sep=',')
df07 = pd.read_csv('data/2007_va.csv', sep=',')
df12 = pd.read_csv('data/2012_va.csv', sep=',')
df17 = pd.read_csv('data/2017_va.csv', sep=',')
df22 = pd.read_csv('data/2022_va.csv', sep=',')
all_tri = pd.read_csv('data/all_tri.csv', sep=',')
/tmp/ipykernel_119/31732373.py:9: DtypeWarning:

Columns (21,22,23,28,29,118) have mixed types. Specify dtype option on import or set low_memory=False.

In [3]:
df87['Combined Air'] = df87['48. 5.1 - FUGITIVE AIR'] + df87['49. 5.2 - STACK AIR']
df87['Non-Air'] = df87['104. TOTAL RELEASES'] - df87['Combined Air']
df87['Water'] = df87['50. 5.3 - WATER']
df87['Non-Water'] = df87['104. TOTAL RELEASES'] - df87['50. 5.3 - WATER']
df87['Other'] = df87['104. TOTAL RELEASES'] - (df87['50. 5.3 - WATER'] + df87['48. 5.1 - FUGITIVE AIR'] + df87['49. 5.2 - STACK AIR'])
In [28]:
df92['Combined Air'] = df92['48. 5.1 - FUGITIVE AIR'] + df92['49. 5.2 - STACK AIR']
df92['Non-Air'] = df92['104. TOTAL RELEASES'] - df92['Combined Air']
df92['Water'] = df92['50. 5.3 - WATER']
df92['Non-Water'] = df92['104. TOTAL RELEASES'] - df92['50. 5.3 - WATER']
df92['Other'] = df92['104. TOTAL RELEASES'] - (df92['50. 5.3 - WATER'] + df92['48. 5.1 - FUGITIVE AIR'] + df92['49. 5.2 - STACK AIR'])
In [5]:
df97['Combined Air'] = df97['48. 5.1 - FUGITIVE AIR'] + df97['49. 5.2 - STACK AIR']
df97['Non-Air'] = df97['104. TOTAL RELEASES'] - df97['Combined Air']
df97['Water'] = df97['50. 5.3 - WATER']
df97['Non-Water'] = df97['104. TOTAL RELEASES'] - df97['50. 5.3 - WATER']
df97['Other'] = df97['104. TOTAL RELEASES'] - (df97['50. 5.3 - WATER'] + df97['48. 5.1 - FUGITIVE AIR'] + df97['49. 5.2 - STACK AIR'])
In [6]:
df02['Combined Air'] = df02['48. 5.1 - FUGITIVE AIR'] + df02['49. 5.2 - STACK AIR']
df02['Non-Air'] = df02['104. TOTAL RELEASES'] - df02['Combined Air']
df02['Water'] = df02['50. 5.3 - WATER']
df02['Non-Water'] = df02['104. TOTAL RELEASES'] - df02['50. 5.3 - WATER']
df02['Other'] = df02['104. TOTAL RELEASES'] - (df02['50. 5.3 - WATER'] + df02['48. 5.1 - FUGITIVE AIR'] + df02['49. 5.2 - STACK AIR'])
In [7]:
df07['Combined Air'] = df07['48. 5.1 - FUGITIVE AIR'] + df07['49. 5.2 - STACK AIR']
df07['Non-Air'] = df07['104. TOTAL RELEASES'] - df07['Combined Air']
df07['Water'] = df07['50. 5.3 - WATER']
df07['Non-Water'] = df07['104. TOTAL RELEASES'] - df07['50. 5.3 - WATER']
df07['Other'] = df07['104. TOTAL RELEASES'] - (df07['50. 5.3 - WATER'] + df07['48. 5.1 - FUGITIVE AIR'] + df07['49. 5.2 - STACK AIR'])
In [8]:
df12['Combined Air'] = df12['48. 5.1 - FUGITIVE AIR'] + df12['49. 5.2 - STACK AIR']
df12['Non-Air'] = df12['104. TOTAL RELEASES'] - df12['Combined Air']
df12['Water'] = df12['50. 5.3 - WATER']
df12['Non-Water'] = df12['104. TOTAL RELEASES'] - df12['50. 5.3 - WATER']
df12['Other'] = df12['104. TOTAL RELEASES'] - (df12['50. 5.3 - WATER'] + df12['48. 5.1 - FUGITIVE AIR'] + df12['49. 5.2 - STACK AIR'])
In [9]:
df17['Combined Air'] = df17['48. 5.1 - FUGITIVE AIR'] + df17['49. 5.2 - STACK AIR']
df17['Non-Air'] = df17['104. TOTAL RELEASES'] - df17['Combined Air']
df17['Water'] = df17['50. 5.3 - WATER']
df17['Non-Water'] = df17['104. TOTAL RELEASES'] - df17['50. 5.3 - WATER']
df17['Other'] = df17['104. TOTAL RELEASES'] - (df17['50. 5.3 - WATER'] + df17['48. 5.1 - FUGITIVE AIR'] + df17['49. 5.2 - STACK AIR'])
In [10]:
df22['Combined Air'] = df22['48. 5.1 - FUGITIVE AIR'] + df22['49. 5.2 - STACK AIR']
df22['Non-Air'] = df22['104. TOTAL RELEASES'] - df22['Combined Air']
df22['Water'] = df22['50. 5.3 - WATER']
df22['Non-Water'] = df22['104. TOTAL RELEASES'] - df22['50. 5.3 - WATER']
df22['Other'] = df22['104. TOTAL RELEASES'] - (df22['50. 5.3 - WATER'] + df22['48. 5.1 - FUGITIVE AIR'] + df22['49. 5.2 - STACK AIR'])

Carcinogen Graph¶

In [107]:
# ACTUALLY use this
y = '43. CARCINOGEN'
Xname = 'Carcinogen Producing Facility?'
Yname = 'Number of Facilities'
fig = px.histogram(all_tri, x="1. YEAR", color="43. CARCINOGEN", barmode="group", text_auto=True)
fig.update_layout(yaxis_title = Yname, xaxis_title = Xname)
fig.update_layout(autosize=False, width=1600, height=900)
fig.update_layout(title_text='Virginia TRI Site Carcinogen-Releasing Status', title_y=0.98, title_x=0.75)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=100))
fig.update_layout(legend_title="Year", title_x=0.5)
fig.update_layout(yaxis = dict(tickfont = dict(size=16)))
fig.update_layout(xaxis = dict(tickfont = dict(size=16)))
fig.update_layout(
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20)  
)
# fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

# increase label size

1987 Top Sectors by Releases Compared to 2022 Same Sectors (USE)¶

In [ ]:
# URGENT: Instead of total releases, look at air vs non-air
# add the two air columns together, make stacked grouped histogram
In [94]:
# sum_releases87 = df87.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
sum_releases87 = df87.groupby('20. INDUSTRY SECTOR').agg({'104. TOTAL RELEASES': 'sum', 
                                                          'Combined Air': 'sum',
                                                          '50. 5.3 - WATER': 'sum',
                                                         }).reset_index()
releases87 = sum_releases87.sort_values("104. TOTAL RELEASES", ascending=False)
top10_releases87 = sum_releases87.sort_values("104. TOTAL RELEASES", ascending=False).head(10)
top10_releases87['Year'] = '1987'
top10_releases87 = top10_releases87.rename(columns={'20. INDUSTRY SECTOR': 'Sector', '104. TOTAL RELEASES': 'Total Releases', '50. 5.3 - WATER' : 'Water'})
top10_releases87
Out[94]:
Sector Total Releases Combined Air Water Year
3 Chemicals 209006742.0 75234885.0 123215261.0 1987
14 Paper 123572811.0 18752489.0 99941364.0 1987
8 Furniture 9593993.0 9321768.0 0.0 1987
19 Printing 7627668.0 7624318.0 250.0 1987
17 Plastics and Rubber 6725559.0 6333701.0 503.0 1987
21 Textiles 6458216.0 3994270.0 2037283.0 1987
18 Primary Metals 5192312.0 2680080.0 32996.0 1987
23 Transportation Equipment 5010296.0 2911236.0 97136.0 1987
4 Computers and Electronic Products 4967561.0 4454919.0 190.0 1987
13 Other 4796247.0 4793747.0 2500.0 1987

2022 Sectors based on 1987 (USE)¶

In [95]:
# sum_releases22 = df22.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
sum_releases22 = df22.groupby('20. INDUSTRY SECTOR').agg({'104. TOTAL RELEASES': 'sum', 
                                                          'Combined Air': 'sum',
                                                          '50. 5.3 - WATER': 'sum',
                                                         }).reset_index()
top10_releases22 = sum_releases22.sort_values("104. TOTAL RELEASES", ascending=False)
top10_releases22['Year'] = '2022'
top10_releases22.head(10)
Out[95]:
20. INDUSTRY SECTOR 104. TOTAL RELEASES Combined Air 50. 5.3 - WATER Year
13 Other 8.624468e+06 9.875721e+04 8.131779e+06 2022
14 Paper 6.534435e+06 5.046022e+06 2.266375e+05 2022
2 Chemicals 4.783454e+06 2.710608e+06 3.684343e+05 2022
4 Electric Utilities 1.790412e+06 1.263527e+06 3.006500e+03 2022
7 Food 1.775758e+06 5.971080e+05 7.945833e+05 2022
6 Fabricated Metals 1.608258e+06 7.180697e+05 5.878526e+05 2022
18 Primary Metals 1.338907e+06 1.806303e+05 2.049494e+03 2022
15 Petroleum 1.318013e+06 1.317967e+06 0.000000e+00 2022
17 Plastics and Rubber 1.117182e+06 9.560208e+05 9.400000e+01 2022
23 Transportation Equipment 4.292275e+05 2.065159e+05 4.240810e+03 2022
In [96]:
match_releases22 = []
i=0
for i in range(10):
    sector_name = top10_releases87["Sector"].iloc[i]
    sect22 = top10_releases22.loc[top10_releases22['20. INDUSTRY SECTOR'] == sector_name]
    match_releases22.append([sector_name, 
                             sect22['104. TOTAL RELEASES'].loc[sect22.index[0]], 
                             sect22['Combined Air'].loc[sect22.index[0]],
                             sect22['50. 5.3 - WATER'].loc[sect22.index[0]],
                             '2022'
                            ])
match22_releasesdf = pd.DataFrame(match_releases22, columns=['Sector', 'Total Releases', 'Combined Air', 'Water', 'Year'])
match22_releasesdf
Out[96]:
Sector Total Releases Combined Air Water Year
0 Chemicals 4.783454e+06 2.710608e+06 3.684343e+05 2022
1 Paper 6.534435e+06 5.046022e+06 2.266375e+05 2022
2 Furniture 1.516438e+05 1.516438e+05 0.000000e+00 2022
3 Printing 2.151657e+04 1.658500e+04 0.000000e+00 2022
4 Plastics and Rubber 1.117182e+06 9.560208e+05 9.400000e+01 2022
5 Textiles 1.375435e+04 2.042522e+02 0.000000e+00 2022
6 Primary Metals 1.338907e+06 1.806303e+05 2.049494e+03 2022
7 Transportation Equipment 4.292275e+05 2.065159e+05 4.240810e+03 2022
8 Computers and Electronic Products 6.538995e+04 5.975298e+04 8.600000e+01 2022
9 Other 8.624468e+06 9.875721e+04 8.131779e+06 2022
In [97]:
frames = [top10_releases87, match22_releasesdf]
releases_df = pd.concat(frames)
releases_df
Out[97]:
Sector Total Releases Combined Air Water Year
3 Chemicals 2.090067e+08 7.523488e+07 1.232153e+08 1987
14 Paper 1.235728e+08 1.875249e+07 9.994136e+07 1987
8 Furniture 9.593993e+06 9.321768e+06 0.000000e+00 1987
19 Printing 7.627668e+06 7.624318e+06 2.500000e+02 1987
17 Plastics and Rubber 6.725559e+06 6.333701e+06 5.030000e+02 1987
21 Textiles 6.458216e+06 3.994270e+06 2.037283e+06 1987
18 Primary Metals 5.192312e+06 2.680080e+06 3.299600e+04 1987
23 Transportation Equipment 5.010296e+06 2.911236e+06 9.713600e+04 1987
4 Computers and Electronic Products 4.967561e+06 4.454919e+06 1.900000e+02 1987
13 Other 4.796247e+06 4.793747e+06 2.500000e+03 1987
0 Chemicals 4.783454e+06 2.710608e+06 3.684343e+05 2022
1 Paper 6.534435e+06 5.046022e+06 2.266375e+05 2022
2 Furniture 1.516438e+05 1.516438e+05 0.000000e+00 2022
3 Printing 2.151657e+04 1.658500e+04 0.000000e+00 2022
4 Plastics and Rubber 1.117182e+06 9.560208e+05 9.400000e+01 2022
5 Textiles 1.375435e+04 2.042522e+02 0.000000e+00 2022
6 Primary Metals 1.338907e+06 1.806303e+05 2.049494e+03 2022
7 Transportation Equipment 4.292275e+05 2.065159e+05 4.240810e+03 2022
8 Computers and Electronic Products 6.538995e+04 5.975298e+04 8.600000e+01 2022
9 Other 8.624468e+06 9.875721e+04 8.131779e+06 2022
In [50]:
# releases_df['Logarithmized Data'] = np.log10(releases_df['104. TOTAL RELEASES'])
# releases_df
In [89]:
releases_df.sort_values(['Year'],ascending=[True],inplace=True)
fig = px.histogram(releases_df, y="Sector", x="Total Releases", color="Year", barmode="group", text_auto=True)
fig.update_layout(yaxis_title = "Sectors", xaxis_title = "Sum of Releases")
# fig.update_layout(autosize=False, width=1000, height=700)
# fig.update_layout(autosize=False, width=1600, height=900)
fig.update_layout(autosize=True)
fig.update_layout(title_text='Virginia TRI Top 10 Sectors by Total Releases in 1987, Compared to 2022', 
                  title_y=0.98, title_x=0.85)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.5)
fig.update_layout(height=950)
fig.update_layout(
    title_font=dict(size=20),
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    yaxis = dict(tickfont = dict(size=16)),
    xaxis = dict(tickfont = dict(size=16))
)
traceorder = "normal"
fig.show()
In [99]:
df_melted = pd.melt(releases_df, id_vars=['Sector', 'Year'], var_name='Type of Release', value_name='Value')
df_melted
Out[99]:
Sector Year Type of Release Value
0 Chemicals 1987 Total Releases 2.090067e+08
1 Paper 1987 Total Releases 1.235728e+08
2 Furniture 1987 Total Releases 9.593993e+06
3 Printing 1987 Total Releases 7.627668e+06
4 Plastics and Rubber 1987 Total Releases 6.725559e+06
5 Textiles 1987 Total Releases 6.458216e+06
6 Primary Metals 1987 Total Releases 5.192312e+06
7 Transportation Equipment 1987 Total Releases 5.010296e+06
8 Computers and Electronic Products 1987 Total Releases 4.967561e+06
9 Other 1987 Total Releases 4.796247e+06
10 Chemicals 2022 Total Releases 4.783454e+06
11 Paper 2022 Total Releases 6.534435e+06
12 Furniture 2022 Total Releases 1.516438e+05
13 Printing 2022 Total Releases 2.151657e+04
14 Plastics and Rubber 2022 Total Releases 1.117182e+06
15 Textiles 2022 Total Releases 1.375435e+04
16 Primary Metals 2022 Total Releases 1.338907e+06
17 Transportation Equipment 2022 Total Releases 4.292275e+05
18 Computers and Electronic Products 2022 Total Releases 6.538995e+04
19 Other 2022 Total Releases 8.624468e+06
20 Chemicals 1987 Combined Air 7.523488e+07
21 Paper 1987 Combined Air 1.875249e+07
22 Furniture 1987 Combined Air 9.321768e+06
23 Printing 1987 Combined Air 7.624318e+06
24 Plastics and Rubber 1987 Combined Air 6.333701e+06
25 Textiles 1987 Combined Air 3.994270e+06
26 Primary Metals 1987 Combined Air 2.680080e+06
27 Transportation Equipment 1987 Combined Air 2.911236e+06
28 Computers and Electronic Products 1987 Combined Air 4.454919e+06
29 Other 1987 Combined Air 4.793747e+06
30 Chemicals 2022 Combined Air 2.710608e+06
31 Paper 2022 Combined Air 5.046022e+06
32 Furniture 2022 Combined Air 1.516438e+05
33 Printing 2022 Combined Air 1.658500e+04
34 Plastics and Rubber 2022 Combined Air 9.560208e+05
35 Textiles 2022 Combined Air 2.042522e+02
36 Primary Metals 2022 Combined Air 1.806303e+05
37 Transportation Equipment 2022 Combined Air 2.065159e+05
38 Computers and Electronic Products 2022 Combined Air 5.975298e+04
39 Other 2022 Combined Air 9.875721e+04
40 Chemicals 1987 Water 1.232153e+08
41 Paper 1987 Water 9.994136e+07
42 Furniture 1987 Water 0.000000e+00
43 Printing 1987 Water 2.500000e+02
44 Plastics and Rubber 1987 Water 5.030000e+02
45 Textiles 1987 Water 2.037283e+06
46 Primary Metals 1987 Water 3.299600e+04
47 Transportation Equipment 1987 Water 9.713600e+04
48 Computers and Electronic Products 1987 Water 1.900000e+02
49 Other 1987 Water 2.500000e+03
50 Chemicals 2022 Water 3.684343e+05
51 Paper 2022 Water 2.266375e+05
52 Furniture 2022 Water 0.000000e+00
53 Printing 2022 Water 0.000000e+00
54 Plastics and Rubber 2022 Water 9.400000e+01
55 Textiles 2022 Water 0.000000e+00
56 Primary Metals 2022 Water 2.049494e+03
57 Transportation Equipment 2022 Water 4.240810e+03
58 Computers and Electronic Products 2022 Water 8.600000e+01
59 Other 2022 Water 8.131779e+06
In [106]:
fig = px.bar(df_melted, 
             x='Value', 
             y='Sector', 
             color='Type of Release', 
             barmode='group', 
             orientation='h', 
             facet_row='Year', 
             height=2000,
             title='Virginia TRI Top 10 Sectors by Total Releases in 1987, Compared to 2022')
fig.update_layout(xaxis_title = "Sum of Releases")
fig.update_layout(title_y=0.97, title_x=0.5)
fig.update_layout(
    title_font=dict(size=20)
)
fig.show()
In [15]:
# With logarithmized data
df_melted['Value'] = np.log(df_melted['Value'])
fig = px.bar(df_melted, 
             x='Value', 
             y='20. INDUSTRY SECTOR', 
             color='Variable', 
             barmode='group', 
             orientation='h', 
             facet_row='Year', 
             height=1000,
             title='Virginia TRI Top 10 Sectors by Total Releases in 1987, Compared to 2022')

fig.show()
/opt/conda/lib/python3.11/site-packages/pandas/core/arraylike.py:399: RuntimeWarning:

divide by zero encountered in log

Geovisualization¶

In [ ]:
color_code = []
for i in range(len(df87)):
    release = df87['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code += ['red'] #fc9272
    else:
        color_code += ['blue'] #de2d26
color_code
df87_color = pd.DataFrame({'Color':color_code})
# df87_color
df87['Color'] = df87_color['Color'].values
# df87
# change the if else color_code to buckets with the range values
In [29]:
color_code3 = []
for i in range(len(df92)):
    release = df92['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code3 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code3 += ['red'] #fc9272
    else:
        color_code3 += ['blue'] #de2d26
color_code
df92_color = pd.DataFrame({'Color':color_code3})
# df87_color
df92['Color'] = df92_color['Color'].values
In [30]:
color_code4 = []
for i in range(len(df97)):
    release = df97['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code4 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code4 += ['red'] #fc9272
    else:
        color_code4 += ['blue'] #de2d26
color_code4
df97_color = pd.DataFrame({'Color':color_code4})
# df87_color
df97['Color'] = df97_color['Color'].values
In [31]:
color_code5 = []
for i in range(len(df02)):
    release = df02['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code5 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code5 += ['red'] #fc9272
    else:
        color_code5 += ['blue'] #de2d26
color_code5
df02_color = pd.DataFrame({'Color':color_code5})
# df87_color
df02['Color'] = df02_color['Color'].values
In [33]:
color_code6 = []
for i in range(len(df07)):
    release = df07['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code6 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code6 += ['red'] #fc9272
    else:
        color_code6 += ['blue'] #de2d26
color_code6
df07_color = pd.DataFrame({'Color':color_code6})
# df87_color
df07['Color'] = df07_color['Color'].values
In [34]:
color_code7 = []
for i in range(len(df12)):
    release = df12['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code7 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code7 += ['red'] #fc9272
    else:
        color_code7 += ['blue'] #de2d26
color_code7
df12_color = pd.DataFrame({'Color':color_code7})
# df87_color
df12['Color'] = df12_color['Color'].values
In [35]:
color_code8 = []
for i in range(len(df17)):
    release = df17['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code8 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code8 += ['red'] #fc9272
    else:
        color_code8 += ['blue'] #de2d26
color_code8
df17_color = pd.DataFrame({'Color':color_code8})
# df87_color
df17['Color'] = df17_color['Color'].values
In [ ]:
color_code2 = []
for i in range(len(df22)):
    release = df22['104. TOTAL RELEASES'].iloc[i]
    if release < 20000: #dummy numbers
        color_code2 += ['green'] #fee0d2
    elif 20000 < release < 80000:
        color_code2 += ['red'] #fc9272
    else:
        color_code2 += ['blue'] #de2d26
color_code2
df22_color = pd.DataFrame({'Color':color_code2})
df22['Color'] = df22_color['Color'].values
# df22
In [14]:
# https://github.com/glynnbird/usstatesgeojson/blob/master/virginia.geojson
url = 'data/virginia.geojson'
va = gpd.read_file(url)
va
ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed
Out[14]:
name abbreviation capital city population area waterarea landarea houseseats statehood group geometry
0 Virginia VA Richmond Virginia Beach 8260405 110787 8508 102279 11 1788-06-25 US States MULTIPOLYGON (((-75.24227 38.02721, -75.29687 ...
In [42]:
geometry87=[Point(xy) for xy in zip(df87["13. LONGITUDE"], df87["12. LATITUDE"])]
geodata87=gpd.GeoDataFrame(df87,geometry=geometry87)

geometry92=[Point(xy) for xy in zip(df92["13. LONGITUDE"], df92["12. LATITUDE"])]
geodata92=gpd.GeoDataFrame(df92,geometry=geometry92)

geometry97=[Point(xy) for xy in zip(df97["13. LONGITUDE"], df97["12. LATITUDE"])]
geodata97=gpd.GeoDataFrame(df97,geometry=geometry97)

geometry02=[Point(xy) for xy in zip(df02["13. LONGITUDE"], df02["12. LATITUDE"])]
geodata02=gpd.GeoDataFrame(df02,geometry=geometry02)

geometry07=[Point(xy) for xy in zip(df07["13. LONGITUDE"], df07["12. LATITUDE"])]
geodata07=gpd.GeoDataFrame(df07,geometry=geometry07)

geometry12=[Point(xy) for xy in zip(df12["13. LONGITUDE"], df12["12. LATITUDE"])]
geodata12=gpd.GeoDataFrame(df12,geometry=geometry12)

geometry12=[Point(xy) for xy in zip(df12["13. LONGITUDE"], df12["12. LATITUDE"])]
geodata12=gpd.GeoDataFrame(df12,geometry=geometry12)

geometry17=[Point(xy) for xy in zip(df17["13. LONGITUDE"], df17["12. LATITUDE"])]
geodata17=gpd.GeoDataFrame(df17,geometry=geometry17)

geometry22=[Point(xy) for xy in zip(df22["13. LONGITUDE"], df22["12. LATITUDE"])]
geodata22=gpd.GeoDataFrame(df22,geometry=geometry22)
In [22]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata87.plot(ax=ax, marker='o', color=df87['Color'], markersize=df87['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 1987")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
# add color column to dataframe, assign total release ranges to different colors
No description has been provided for this image
In [46]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata92.plot(ax=ax, marker='o', color=df92['Color'], markersize=df92['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 1992")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [47]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata97.plot(ax=ax, marker='o', color=df97['Color'], markersize=df97['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 1997")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [48]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata02.plot(ax=ax, marker='o', color=df02['Color'], markersize=df02['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 2002")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [49]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata07.plot(ax=ax, marker='o', color=df07['Color'], markersize=df07['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 2007")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [50]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata12.plot(ax=ax, marker='o', color=df12['Color'], markersize=df12['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 2012")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [58]:
fig, ax = plt.subplots(figsize=[10,7])
# va.plot(ax=ax, color='white', edgecolor='black', figsize=(10,6))
va.plot(ax=ax, color='white', edgecolor='black')
geodata17.plot(ax=ax, marker='o', color=df17['Color'], markersize=df17['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 2017")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [27]:
fig, ax = plt.subplots(figsize=[10,7]) #inches
va.plot(ax=ax, color='white', edgecolor='black')
geodata22.plot(ax=ax, marker='o', color=df22['Color'], markersize=df22['104. TOTAL RELEASES']/30000)
plt.title("TRI Sites in 2022")
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()
No description has been provided for this image
In [16]:
# look at more explanatory data/graphs for time series
# time series line plot


# writeup: look at why some categories in 2022 have gone down
# look at data for one or two sectors?
# time series for other years
In [88]:
fig = px.choropleth(data_frame=geodata87,
                    locations='geodata87',
                    color='lifeExp',
                    hover_data={'iso_alpha': False},
                    range_color=[df['lifeExp'].min(), df['lifeExp'].max()],
                    labels = {'year': 'Year', 'lifeExp': 'Life Exp.'}
                   )
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[88], line 6
      1 fig = px.choropleth(data_frame=geodata87,
      2                     locations='iso_alpha',
      3                     color='lifeExp',
      4                     animation_frame='year',
      5                     hover_data={'iso_alpha': False},
----> 6                     range_color=[df['lifeExp'].min(), df['lifeExp'].max()],
      7                     labels = {'year': 'Year', 'lifeExp': 'Life Exp.'}
      8                    )

NameError: name 'df' is not defined

Individual Line Graph for Just Paper and Food Sectors¶

  • Emissions on Y axis
  • Year on X axis
  • Graph should be a line graph, lines should be the sectors
  • Focus on Industry Sector Paper and Food filters, and add their total releases over the years
In [23]:
filter87 = df87[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
filter87
Out[23]:
20. INDUSTRY SECTOR 104. TOTAL RELEASES
0 Wood Products 750.0
1 Furniture 15350.0
2 Chemicals 500.0
3 Furniture 35913.0
4 Plastics and Rubber 48456.0
... ... ...
1535 Furniture 99593.0
1536 Primary Metals 108000.0
1537 Primary Metals 250.0
1538 Paper 530200.0
1539 Chemicals 0.0

1540 rows × 2 columns

Food¶

1987¶

In [24]:
food87 = filter87.loc[filter87['20. INDUSTRY SECTOR'] == 'Food']
food87
Out[24]:
20. INDUSTRY SECTOR 104. TOTAL RELEASES
38 Food 0.0
41 Food 19913.0
65 Food 0.0
69 Food 0.0
79 Food 7080.0
110 Food 0.0
116 Food 0.0
118 Food 6720.0
169 Food 2500.0
227 Food 0.0
228 Food 0.0
292 Food 0.0
312 Food 0.0
317 Food 12000.0
329 Food 0.0
343 Food 0.0
507 Food 2990.0
596 Food 30690.0
623 Food 0.0
645 Food 0.0
666 Food 0.0
680 Food 0.0
687 Food 0.0
753 Food 0.0
783 Food 0.0
804 Food 0.0
834 Food 0.0
892 Food 242000.0
900 Food 0.0
902 Food 0.0
949 Food 5254.0
973 Food 0.0
989 Food 40000.0
1064 Food 0.0
1120 Food 1620.0
1128 Food 0.0
1138 Food 0.0
1156 Food 40000.0
1161 Food 11158.0
1164 Food 0.0
1181 Food 31700.0
1254 Food 250.0
1295 Food 0.0
1299 Food 8398.0
1301 Food 0.0
1333 Food 0.0
1336 Food 0.0
1341 Food 0.0
1359 Food 11500.0
1360 Food 0.0
1431 Food 7800.0
1434 Food 0.0
1465 Food 0.0
1473 Food 0.0
1512 Food 0.0
1514 Food 1600.0
1533 Food 2300.0
In [25]:
sum_food87 = food87['104. TOTAL RELEASES'].to_numpy().sum()
sum_food87

# *** do the same process for other years and append them into their own dataframe

# data = [['Sector', 'Food'], ['Sum of Releases', sum_food87]]

# Create the pandas DataFrame
# df = pd.DataFrame(data, columns=['Sector', 'Sum of Releases'])
# print(df)
Out[25]:
485473.0

1992¶

In [61]:
filter92 = df92[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food92 = filter92.loc[filter92['20. INDUSTRY SECTOR'] == 'Food']
sum_food92 = food92['104. TOTAL RELEASES'].to_numpy().sum()
sum_food92
Out[61]:
410716.0

1997¶

In [60]:
filter97 = df97[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food97 = filter97.loc[filter97['20. INDUSTRY SECTOR'] == 'Food']
sum_food97 = food97['104. TOTAL RELEASES'].to_numpy().sum()
sum_food97
Out[60]:
4655781.0

2002¶

In [62]:
filter02 = df02[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food02 = filter02.loc[filter02['20. INDUSTRY SECTOR'] == 'Food']
sum_food02 = food02['104. TOTAL RELEASES'].to_numpy().sum()
sum_food02
Out[62]:
3452165.58

2007¶

In [63]:
filter07 = df07[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food07 = filter07.loc[filter02['20. INDUSTRY SECTOR'] == 'Food']
sum_food07 = food07['104. TOTAL RELEASES'].to_numpy().sum()
sum_food07
Out[63]:
928578.8992999999

2012¶

In [64]:
filter12 = df12[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food12 = filter12.loc[filter12['20. INDUSTRY SECTOR'] == 'Food']
sum_food12 = food12['104. TOTAL RELEASES'].to_numpy().sum()
sum_food12
Out[64]:
3370903.5700000003

2017¶

In [65]:
filter17 = df17[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food17 = filter17.loc[filter17['20. INDUSTRY SECTOR'] == 'Food']
sum_food17 = food17['104. TOTAL RELEASES'].to_numpy().sum()
sum_food17
Out[65]:
2944558.0700000003

2022¶

In [56]:
filter22 = df22[['20. INDUSTRY SECTOR', '104. TOTAL RELEASES']]
food22 = filter22.loc[filter22['20. INDUSTRY SECTOR'] == 'Food']
sum_food22 = food22['104. TOTAL RELEASES'].to_numpy().sum()
sum_food22
Out[56]:
1775758.3460000004

Summing all food sums together¶

In [66]:
sum_food_df = pd.DataFrame()
sum_food_df['Sum of Releases'] = [sum_food87, 
                                  sum_food92,
                                  sum_food97,
                                  sum_food02,
                                  sum_food07,
                                  sum_food12,
                                  sum_food17,
                                  sum_food22]
sum_food_df.insert(1, "Year", [1987, 1992, 1997, 2002, 
                               2007, 2012, 2017, 2022], True)
sum_food_df["Sector"] = 'Food'
sum_food_df
Out[66]:
Sum of Releases Year Sector
0 4.854730e+05 1987 Food
1 4.107160e+05 1992 Food
2 4.655781e+06 1997 Food
3 3.452166e+06 2002 Food
4 9.285789e+05 2007 Food
5 3.370904e+06 2012 Food
6 2.944558e+06 2017 Food
7 1.775758e+06 2022 Food

Paper¶

1987¶

In [28]:
paper87 = filter87.loc[filter87['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper87 = paper87['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper87
Out[28]:
123572811.0

1992¶

In [67]:
paper92 = filter92.loc[filter92['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper92 = paper92['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper92
Out[67]:
13046944.0

1997¶

In [68]:
paper97 = filter97.loc[filter97['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper97 = paper97['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper97
Out[68]:
18203479.0

2002¶

In [69]:
paper02 = filter02.loc[filter02['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper02 = paper02['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper02
Out[69]:
14963535.346499998

2007¶

In [71]:
paper07 = filter07.loc[filter07['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper07 = paper07['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper07
Out[71]:
11364427.617899999

2012¶

In [72]:
paper12 = filter12.loc[filter12['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper12 = paper12['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper12
Out[72]:
6497866.236127

2017¶

In [73]:
paper17 = filter17.loc[filter17['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper17 = paper17['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper17
Out[73]:
7014684.635014

2022¶

In [74]:
paper22 = filter22.loc[filter22['20. INDUSTRY SECTOR'] == 'Paper']
sum_paper22 = paper22['104. TOTAL RELEASES'].to_numpy().sum()
sum_paper22
Out[74]:
6534434.722464999

Adding sum of all paper dataframes¶

In [75]:
sum_paper_df = pd.DataFrame()
sum_paper_df['Sum of Releases'] = [sum_paper87, 
                                   sum_paper92,
                                   sum_paper97,
                                   sum_paper02,
                                   sum_paper07,
                                   sum_paper12,
                                   sum_paper17,
                                   sum_paper22]
sum_paper_df.insert(1, "Year", [1987, 1992, 1997, 2002, 
                               2007, 2012, 2017, 2022], True)
sum_paper_df["Sector"] = 'Paper'
sum_paper_df
Out[75]:
Sum of Releases Year Sector
0 1.235728e+08 1987 Paper
1 1.304694e+07 1992 Paper
2 1.820348e+07 1997 Paper
3 1.496354e+07 2002 Paper
4 1.136443e+07 2007 Paper
5 6.497866e+06 2012 Paper
6 7.014685e+06 2017 Paper
7 6.534435e+06 2022 Paper
In [31]:
sum_paper87
Out[31]:
123572811.0
In [32]:
sum_paper22
Out[32]:
6534434.722464999
In [76]:
frames = [sum_food_df, sum_paper_df]
sum_all_df = pd.concat(frames)
sum_all_df['Logarithmized Data'] = np.log10(sum_all_df['Sum of Releases'])
sum_all_df
Out[76]:
Sum of Releases Year Sector Logarithmized Data
0 4.854730e+05 1987 Food 5.686165
1 4.107160e+05 1992 Food 5.613542
2 4.655781e+06 1997 Food 6.667993
3 3.452166e+06 2002 Food 6.538092
4 9.285789e+05 2007 Food 5.967819
5 3.370904e+06 2012 Food 6.527746
6 2.944558e+06 2017 Food 6.469020
7 1.775758e+06 2022 Food 6.249384
0 1.235728e+08 1987 Paper 8.091923
1 1.304694e+07 1992 Paper 7.115509
2 1.820348e+07 1997 Paper 7.260154
3 1.496354e+07 2002 Paper 7.175034
4 1.136443e+07 2007 Paper 7.055548
5 6.497866e+06 2012 Paper 6.812771
6 7.014685e+06 2017 Paper 6.846008
7 6.534435e+06 2022 Paper 6.815208
In [77]:
fig = px.line(sum_all_df, y="Logarithmized Data", x="Year", 
              color='Sector',
              title='Sum of Releases Over Year by Sector', markers=True)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.5)
fig.update_layout(autosize=False, width=900, height=600)
fig.update_layout(title_y=0.98, title_x=0.85)
fig.show()
In [78]:
fig = px.line(sum_all_df, y="Sum of Releases", x="Year", 
              color='Sector',
              title='Sum of Releases Over Year by Sector', markers=True)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.5)
fig.update_layout(autosize=False, width=900, height=600)
fig.update_layout(title_y=0.98, title_x=0.85)
fig.show()

Individual Line Graph for Other Sector (USE)¶

1987¶

In [15]:
top10_releases87
other87 = top10_releases87.loc[top10_releases87['20. INDUSTRY SECTOR'] == 'Other']
other87
Out[15]:
20. INDUSTRY SECTOR Sum Year
13 Other 4796247.0 1987
In [16]:
sum_other87 = other87['Sum'].to_numpy().sum()
sum_other87
Out[16]:
4796247.0
In [21]:
sum_releases92 = df92.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
top10_releases92 = sum_releases92.sort_values("Sum", ascending=False)
top10_releases92['Year'] = '1992'
top10_releases92 = top10_releases92.loc[top10_releases92['20. INDUSTRY SECTOR'] == 'Other']
top10_releases92
Out[21]:
20. INDUSTRY SECTOR Sum Year
12 Other 268155.0 1992
In [22]:
sum_releases97 = df97.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
top10_releases97 = sum_releases97.sort_values("Sum", ascending=False)
top10_releases97['Year'] = '1997'
top10_releases97 = top10_releases97.loc[top10_releases97['20. INDUSTRY SECTOR'] == 'Other']
top10_releases97
Out[22]:
20. INDUSTRY SECTOR Sum Year
12 Other 1390787.0 1997
In [24]:
sum_releases02 = df02.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
top10_releases02 = sum_releases02.sort_values("Sum", ascending=False)
top10_releases02['Year'] = '2002'
top10_releases02 = top10_releases02.loc[top10_releases02['20. INDUSTRY SECTOR'] == 'Other']
top10_releases02
Out[24]:
20. INDUSTRY SECTOR Sum Year
13 Other 1.373054e+07 2002
In [25]:
sum_releases07 = df07.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
top10_releases07 = sum_releases07.sort_values("Sum", ascending=False)
top10_releases07['Year'] = '2007'
top10_releases07 = top10_releases07.loc[top10_releases07['20. INDUSTRY SECTOR'] == 'Other']
top10_releases07
Out[25]:
20. INDUSTRY SECTOR Sum Year
13 Other 1.452810e+07 2007
In [26]:
sum_releases12 = df12.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
top10_releases12 = sum_releases12.sort_values("Sum", ascending=False)
top10_releases12['Year'] = '2012'
top10_releases12 = top10_releases12.loc[top10_releases12['20. INDUSTRY SECTOR'] == 'Other']
top10_releases12
Out[26]:
20. INDUSTRY SECTOR Sum Year
13 Other 8.553324e+06 2012
In [28]:
sum_releases17 = df17.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
top10_releases17 = sum_releases17.sort_values("Sum", ascending=False)
top10_releases17['Year'] = '2017'
top10_releases17 = top10_releases17.loc[top10_releases17['20. INDUSTRY SECTOR'] == 'Other']
top10_releases17
Out[28]:
20. INDUSTRY SECTOR Sum Year
13 Other 1.015744e+07 2017
In [18]:
other22 = top10_releases22.loc[top10_releases22['20. INDUSTRY SECTOR'] == 'Other']
sum_other22 = other22['Sum'].to_numpy().sum()
sum_other22
Out[18]:
8624468.437292
In [19]:
other22
Out[19]:
20. INDUSTRY SECTOR Sum Year
13 Other 8.624468e+06 2022
In [30]:
frames = [other87, top10_releases92, top10_releases97, top10_releases02,
          top10_releases07, top10_releases12, top10_releases17, other22]
other_df = pd.concat(frames)
other_df
Out[30]:
20. INDUSTRY SECTOR Sum Year
13 Other 4.796247e+06 1987
12 Other 2.681550e+05 1992
12 Other 1.390787e+06 1997
13 Other 1.373054e+07 2002
13 Other 1.452810e+07 2007
13 Other 8.553324e+06 2012
13 Other 1.015744e+07 2017
13 Other 8.624468e+06 2022

Petroleum Gathering¶

In [60]:
# pet87 = releases87.loc[releases87['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet87['Year'] = '1987'
# pet87
sum_releases87 = df87.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet87 = sum_releases87.sort_values("Sum", ascending=False)
pet87['Year'] = '1987'
pet87 = pet87.loc[pet87['20. INDUSTRY SECTOR'] == 'Petroleum']
pet87
Out[60]:
20. INDUSTRY SECTOR Sum Year
15 Petroleum 2171632.0 1987
In [84]:
sum_releases92 = df92.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet92 = sum_releases92.sort_values("Sum", ascending=False)
pet92['Year'] = '1992'
pet92 = pet92.loc[pet92['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet92

sum_releases97 = df97.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet97 = sum_releases97.sort_values("Sum", ascending=False)
pet97['Year'] = '1997'
pet97 = pet97.loc[pet97['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet97

sum_releases02 = df02.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet02 = sum_releases02.sort_values("Sum", ascending=False)
pet02['Year'] = '2002'
pet02 = pet02.loc[pet02['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet02

sum_releases07 = df07.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet07 = sum_releases07.sort_values("Sum", ascending=False)
pet07['Year'] = '2007'
pet07 = pet07.loc[pet07['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet07

sum_releases12 = df12.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet12 = sum_releases12.sort_values("Sum", ascending=False)
pet12['Year'] = '2012'
pet12 = pet12.loc[pet12['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet12

sum_releases17 = df17.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet17 = sum_releases17.sort_values("Sum", ascending=False)
pet17['Year'] = '2017'
pet17 = pet17.loc[pet17['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet17

sum_releases22 = df22.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
pet22 = sum_releases22.sort_values("Sum", ascending=False)
pet22['Year'] = '2022'
pet22 = pet22.loc[pet22['20. INDUSTRY SECTOR'] == 'Petroleum']
# pet22
In [72]:
pet_frames = [pet87, pet92, pet97, pet02, pet07, pet12, pet17, pet22]
pet_df = pd.concat(pet_frames)
pet_df
Out[72]:
20. INDUSTRY SECTOR Sum Year
15 Petroleum 2.171632e+06 1987
14 Petroleum 4.975890e+05 1992
14 Petroleum 6.920930e+05 1997
15 Petroleum 2.963648e+05 2002
15 Petroleum 2.309861e+05 2007
15 Petroleum 2.963654e+01 2012
15 Petroleum 2.138982e+04 2017
15 Petroleum 1.318013e+06 2022

Printing Gathering¶

In [82]:
sum_releases87 = df87.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print87 = sum_releases87.sort_values("Sum", ascending=False)
print87['Year'] = '1987'
print87 = print87.loc[print87['20. INDUSTRY SECTOR'] == 'Printing']
# print87

sum_releases92 = df92.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print92 = sum_releases92.sort_values("Sum", ascending=False)
print92['Year'] = '1992'
print92 = print92.loc[print92['20. INDUSTRY SECTOR'] == 'Printing']
# pet92

sum_releases97 = df97.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print97 = sum_releases97.sort_values("Sum", ascending=False)
print97['Year'] = '1997'
print97 = print97.loc[print97['20. INDUSTRY SECTOR'] == 'Printing']
# pet97

sum_releases02 = df02.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print02 = sum_releases02.sort_values("Sum", ascending=False)
print02['Year'] = '2002'
print02 = print02.loc[print02['20. INDUSTRY SECTOR'] == 'Printing']
# pet02

sum_releases07 = df07.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print07 = sum_releases07.sort_values("Sum", ascending=False)
print07['Year'] = '2007'
print07 = print07.loc[print07['20. INDUSTRY SECTOR'] == 'Printing']
# pet07

sum_releases12 = df12.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print12 = sum_releases12.sort_values("Sum", ascending=False)
print12['Year'] = '2012'
print12 = print12.loc[print12['20. INDUSTRY SECTOR'] == 'Printing']
# pet12

sum_releases17 = df17.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print17 = sum_releases17.sort_values("Sum", ascending=False)
print17['Year'] = '2017'
print17 = print17.loc[print17['20. INDUSTRY SECTOR'] == 'Printing']
# pet17

sum_releases22 = df22.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
print22 = sum_releases22.sort_values("Sum", ascending=False)
print22['Year'] = '2022'
print22 = print22.loc[print22['20. INDUSTRY SECTOR'] == 'Printing']
# pet22
In [83]:
print_frames = [print87, print92, print97, print02, print07, print12, print17, print22]
print_df = pd.concat(print_frames)
print_df
Out[83]:
20. INDUSTRY SECTOR Sum Year
19 Printing 7.627668e+06 1987
17 Printing 4.781883e+06 1992
17 Printing 2.719629e+06 1997
19 Printing 3.188420e+06 2002
19 Printing 1.195980e+06 2007
19 Printing 4.680078e+05 2012
19 Printing 9.353110e+04 2017
19 Printing 2.151657e+04 2022

Chemical gathering¶

In [33]:
# Change to chemicals
sum_releases87 = df87.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem87 = sum_releases87.sort_values("Sum", ascending=False)
chem87['Year'] = '1987'
chem87 = chem87.loc[chem87['20. INDUSTRY SECTOR'] == 'Chemicals']
# print87

sum_releases92 = df92.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem92 = sum_releases92.sort_values("Sum", ascending=False)
chem92['Year'] = '1992'
chem92 = chem92.loc[chem92['20. INDUSTRY SECTOR'] == 'Chemicals']
# pet92

sum_releases97 = df97.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem97 = sum_releases97.sort_values("Sum", ascending=False)
chem97['Year'] = '1997'
chem97 = chem97.loc[chem97['20. INDUSTRY SECTOR'] == 'Chemicals']
# pet97

sum_releases02 = df02.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem02 = sum_releases02.sort_values("Sum", ascending=False)
chem02['Year'] = '2002'
chem02 = chem02.loc[chem02['20. INDUSTRY SECTOR'] == 'Chemicals']
# pet02

sum_releases07 = df07.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem07 = sum_releases07.sort_values("Sum", ascending=False)
chem07['Year'] = '2007'
chem07 = chem07.loc[chem07['20. INDUSTRY SECTOR'] == 'Chemicals']
# pet07

sum_releases12 = df12.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem12 = sum_releases12.sort_values("Sum", ascending=False)
chem12['Year'] = '2012'
chem12 = chem12.loc[chem12['20. INDUSTRY SECTOR'] == 'Chemicals']
# pet12

sum_releases17 = df17.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem17 = sum_releases17.sort_values("Sum", ascending=False)
chem17['Year'] = '2017'
chem17 = chem17.loc[chem17['20. INDUSTRY SECTOR'] == 'Chemicals']
# pet17

sum_releases22 = df22.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
chem22 = sum_releases22.sort_values("Sum", ascending=False)
chem22['Year'] = '2022'
chem22 = chem22.loc[chem22['20. INDUSTRY SECTOR'] == 'Chemicals']
chem22
Out[33]:
20. INDUSTRY SECTOR Sum Year
2 Chemicals 4.783454e+06 2022
In [34]:
chem_frames = [chem87, chem92, chem97, chem02, chem07, chem12, chem17, chem22]
chem_df = pd.concat(chem_frames)
chem_df
Out[34]:
20. INDUSTRY SECTOR Sum Year
3 Chemicals 2.090067e+08 1987
3 Chemicals 2.246028e+07 1992
3 Chemicals 1.008758e+07 1997
2 Chemicals 9.660913e+06 2002
2 Chemicals 4.780860e+06 2007
2 Chemicals 4.363623e+06 2012
2 Chemicals 4.237659e+06 2017
2 Chemicals 4.783454e+06 2022

Paper gathering¶

In [35]:
# Change to paper
sum_releases87 = df87.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper87 = sum_releases87.sort_values("Sum", ascending=False)
paper87['Year'] = '1987'
paper87 = paper87.loc[paper87['20. INDUSTRY SECTOR'] == 'Paper']
# print87

sum_releases92 = df92.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper92 = sum_releases92.sort_values("Sum", ascending=False)
paper92['Year'] = '1992'
paper92 = paper92.loc[paper92['20. INDUSTRY SECTOR'] == 'Paper']
# pet92

sum_releases97 = df97.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper97 = sum_releases97.sort_values("Sum", ascending=False)
paper97['Year'] = '1997'
paper97 = paper97.loc[paper97['20. INDUSTRY SECTOR'] == 'Paper']
# pet97

sum_releases02 = df02.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper02 = sum_releases02.sort_values("Sum", ascending=False)
paper02['Year'] = '2002'
paper02 = paper02.loc[paper02['20. INDUSTRY SECTOR'] == 'Paper']
# pet02

sum_releases07 = df07.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper07 = sum_releases07.sort_values("Sum", ascending=False)
paper07['Year'] = '2007'
paper07 = paper07.loc[paper07['20. INDUSTRY SECTOR'] == 'Paper']
# pet07

sum_releases12 = df12.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper12 = sum_releases12.sort_values("Sum", ascending=False)
paper12['Year'] = '2012'
paper12 = paper12.loc[paper12['20. INDUSTRY SECTOR'] == 'Paper']
# pet12

sum_releases17 = df17.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper17 = sum_releases17.sort_values("Sum", ascending=False)
paper17['Year'] = '2017'
paper17 = paper17.loc[paper17['20. INDUSTRY SECTOR'] == 'Paper']
# pet17

sum_releases22 = df22.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
paper22 = sum_releases22.sort_values("Sum", ascending=False)
paper22['Year'] = '2022'
paper22 = paper22.loc[paper22['20. INDUSTRY SECTOR'] == 'Paper']
# pet22
In [36]:
paper_frames = [paper87, paper92, paper97, paper02, paper07, paper12, paper17, paper22]
paper_df = pd.concat(paper_frames)
paper_df
Out[36]:
20. INDUSTRY SECTOR Sum Year
14 Paper 1.235728e+08 1987
13 Paper 1.304694e+07 1992
13 Paper 1.820348e+07 1997
14 Paper 1.496354e+07 2002
14 Paper 1.136443e+07 2007
14 Paper 6.497866e+06 2012
14 Paper 7.014685e+06 2017
14 Paper 6.534435e+06 2022

Combining the dataframes for line graphs¶

In [37]:
combined_frames = [chem_df, paper_df]
combined_df = pd.concat(combined_frames)
combined_df
Out[37]:
20. INDUSTRY SECTOR Sum Year
3 Chemicals 2.090067e+08 1987
3 Chemicals 2.246028e+07 1992
3 Chemicals 1.008758e+07 1997
2 Chemicals 9.660913e+06 2002
2 Chemicals 4.780860e+06 2007
2 Chemicals 4.363623e+06 2012
2 Chemicals 4.237659e+06 2017
2 Chemicals 4.783454e+06 2022
14 Paper 1.235728e+08 1987
13 Paper 1.304694e+07 1992
13 Paper 1.820348e+07 1997
14 Paper 1.496354e+07 2002
14 Paper 1.136443e+07 2007
14 Paper 6.497866e+06 2012
14 Paper 7.014685e+06 2017
14 Paper 6.534435e+06 2022
In [38]:
fig = px.line(other_df, y="Sum", x="Year", 
              title='Sum of Releases Over Year by "Other" Sector', markers=True, text="Sum")
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.3)
fig.update_layout(autosize=False, width=900, height=400)
fig.update_layout(title_y=0.98, title_x=0.5)
fig.update_traces(textposition='top center')
fig.show()
#log_x=True
In [92]:
fig = px.line(combined_df, y="Sum", x="Year", 
              title='Sum of Releases Over Year by Chemical and Paper Sectors', color = '20. INDUSTRY SECTOR', log_x = True, markers=True)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.3)
fig.update_layout(autosize=False, width=900, height=400)
fig.update_layout(title_y=0.98, title_x=0.5)
fig.update_traces(textposition='top center')
fig.update_layout(
    title_font=dict(size=20),
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    yaxis = dict(tickfont = dict(size=16)),
    xaxis = dict(tickfont = dict(size=16))
)
fig.show()
#log_x=True
#text = "Sum"
In [40]:
# Look into regulations that were passed
# Graph all changes and see where there are drastic changes
# Examine businesses?
# Look at paper and chemical sectors
In [41]:
# Writeup: Format like a scientific journal
# Intro, motivation, section to show figures, deeper dive into certain categories, references

Air gathering¶

In [11]:
# sec_releases87 = df87.groupby('20. INDUSTRY SECTOR')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
# air87 = sec_releases87.sort_values("Sum", ascending=False)
# air87['Year'] = '1987'
# air87 = air87.loc[air87['20. INDUSTRY SECTOR'] == 'Paper']
# print out air sector, sum of air sector numbers, year

# air_releases87 = df87.groupby('Combined Air')['104. TOTAL RELEASES'].sum().reset_index(name='Sum')
air_releases87 = df87['Combined Air']
sum_air87 = air_releases87.sum()
sum_air87_df = pd.DataFrame([[sum_air87]],columns=['Sum'])
sum_air87_df['Year'] = '1987'
sum_air87_df['Sector'] = 'Air'
sum_air87_df
Out[11]:
Sum Year Sector
0 145172673.0 1987 Air
In [12]:
air_releases92 = df92['Combined Air']
sum_air92 = air_releases92.sum()
sum_air92_df = pd.DataFrame([[sum_air92]],columns=['Sum'])
sum_air92_df['Year'] = '1992'
sum_air92_df['Sector'] = 'Air'
# sum_air92_df

air_releases97 = df97['Combined Air']
sum_air97 = air_releases97.sum()
sum_air97_df = pd.DataFrame([[sum_air97]],columns=['Sum'])
sum_air97_df['Year'] = '1997'
sum_air97_df['Sector'] = 'Air'
# sum_air97_df

air_releases02 = df02['Combined Air']
sum_air02 = air_releases02.sum()
sum_air02_df = pd.DataFrame([[sum_air02]],columns=['Sum'])
sum_air02_df['Year'] = '2002'
sum_air02_df['Sector'] = 'Air'
# sum_air02_df

air_releases07 = df07['Combined Air']
sum_air07 = air_releases07.sum()
sum_air07_df = pd.DataFrame([[sum_air07]],columns=['Sum'])
sum_air07_df['Year'] = '2007'
sum_air07_df['Sector'] = 'Air'
# sum_air02_df

air_releases12 = df12['Combined Air']
sum_air12 = air_releases12.sum()
sum_air12_df = pd.DataFrame([[sum_air12]],columns=['Sum'])
sum_air12_df['Year'] = '2012'
sum_air12_df['Sector'] = 'Air'
# sum_air12_df

air_releases17 = df17['Combined Air']
sum_air17 = air_releases17.sum()
sum_air17_df = pd.DataFrame([[sum_air17]],columns=['Sum'])
sum_air17_df['Year'] = '2017'
sum_air17_df['Sector'] = 'Air'
# sum_air17_df

air_releases22 = df22['Combined Air']
sum_air22 = air_releases22.sum()
sum_air22_df = pd.DataFrame([[sum_air22]],columns=['Sum'])
sum_air22_df['Year'] = '2022'
sum_air22_df['Sector'] = 'Air'
# sum_air22_df
In [13]:
air_frames = [sum_air87_df, sum_air92_df, sum_air97_df, sum_air02_df, 
              sum_air07_df, sum_air12_df, sum_air17_df, sum_air22_df]
air_df = pd.concat(air_frames)
air_df
Out[13]:
Sum Year Sector
0 1.451727e+08 1987 Air
0 6.582979e+07 1992 Air
0 4.854111e+07 1997 Air
0 5.696188e+07 2002 Air
0 3.950086e+07 2007 Air
0 2.085558e+07 2012 Air
0 1.641368e+07 2017 Air
0 1.401291e+07 2022 Air

Gathering non-air¶

In [14]:
nonair_releases87 = df87['Non-Air']
sum_nonair87 = nonair_releases87.sum()
sum_nonair87_df = pd.DataFrame([[sum_nonair87]],columns=['Sum'])
sum_nonair87_df['Year'] = '1987'
sum_nonair87_df['Sector'] = 'Non-Air'

nonair_releases92 = df92['Non-Air']
sum_nonair92 = nonair_releases92.sum()
sum_nonair92_df = pd.DataFrame([[sum_nonair92]],columns=['Sum'])
sum_nonair92_df['Year'] = '1992'
sum_nonair92_df['Sector'] = 'Non-Air'
# sum_air92_df

nonair_releases97 = df97['Non-Air']
sum_nonair97 = nonair_releases97.sum()
sum_nonair97_df = pd.DataFrame([[sum_air97]],columns=['Sum'])
sum_nonair97_df['Year'] = '1997'
sum_nonair97_df['Sector'] = 'Non-Air'
# sum_air97_df

nonair_releases02 = df02['Non-Air']
sum_nonair02 = nonair_releases02.sum()
sum_nonair02_df = pd.DataFrame([[sum_nonair02]],columns=['Sum'])
sum_nonair02_df['Year'] = '2002'
sum_nonair02_df['Sector'] = 'Non-Air'
# sum_air02_df

nonair_releases07 = df07['Non-Air']
sum_nonair07 = nonair_releases07.sum()
sum_nonair07_df = pd.DataFrame([[sum_nonair07]],columns=['Sum'])
sum_nonair07_df['Year'] = '2007'
sum_nonair07_df['Sector'] = 'Non-Air'
# sum_air02_df

nonair_releases12 = df12['Non-Air']
sum_nonair12 = nonair_releases12.sum()
sum_nonair12_df = pd.DataFrame([[sum_nonair12]],columns=['Sum'])
sum_nonair12_df['Year'] = '2012'
sum_nonair12_df['Sector'] = 'Non-Air'
# sum_air12_df

nonair_releases17 = df17['Non-Air']
sum_nonair17 = nonair_releases17.sum()
sum_nonair17_df = pd.DataFrame([[sum_nonair17]],columns=['Sum'])
sum_nonair17_df['Year'] = '2017'
sum_nonair17_df['Sector'] = 'Non-Air'
# sum_air17_df

nonair_releases22 = df22['Non-Air']
sum_nonair22 = nonair_releases22.sum()
sum_nonair22_df = pd.DataFrame([[sum_nonair22]],columns=['Sum'])
sum_nonair22_df['Year'] = '2022'
sum_nonair22_df['Sector'] = 'Non-Air'
# sum_nonair22_df
In [15]:
nonair_frames = [sum_nonair87_df, sum_nonair92_df, sum_nonair97_df, sum_nonair02_df, 
              sum_nonair07_df, sum_nonair12_df, sum_nonair17_df, sum_nonair22_df]
nonair_df = pd.concat(nonair_frames)
nonair_df
Out[15]:
Sum Year Sector
0 2.492143e+08 1987 Non-Air
0 6.347974e+06 1992 Non-Air
0 4.854111e+07 1997 Non-Air
0 3.355421e+07 2002 Non-Air
0 3.256734e+07 2007 Non-Air
0 2.199582e+07 2012 Non-Air
0 2.027180e+07 2017 Non-Air
0 1.635928e+07 2022 Non-Air

Combining air and non-air dataframes¶

In [16]:
combined_air_frames = [air_df, nonair_df]
combined_air_df = pd.concat(combined_air_frames)
combined_air_df
Out[16]:
Sum Year Sector
0 1.451727e+08 1987 Air
0 6.582979e+07 1992 Air
0 4.854111e+07 1997 Air
0 5.696188e+07 2002 Air
0 3.950086e+07 2007 Air
0 2.085558e+07 2012 Air
0 1.641368e+07 2017 Air
0 1.401291e+07 2022 Air
0 2.492143e+08 1987 Non-Air
0 6.347974e+06 1992 Non-Air
0 4.854111e+07 1997 Non-Air
0 3.355421e+07 2002 Non-Air
0 3.256734e+07 2007 Non-Air
0 2.199582e+07 2012 Non-Air
0 2.027180e+07 2017 Non-Air
0 1.635928e+07 2022 Non-Air
In [17]:
fig = px.line(combined_air_df, y="Sum", x="Year", 
              title='Sum of Releases, Air vs. Non-Air', color = 'Sector', markers=True)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.3)
fig.update_layout(autosize=False, width=900, height=400)
fig.update_layout(title_y=0.95, title_x=0.5)
fig.update_traces(textposition='top center')
fig.show()

Gathering water¶

In [18]:
water_releases87 = df87['Water']
sum_water87 = water_releases87.sum()
sum_water87_df = pd.DataFrame([[sum_water87]],columns=['Sum'])
sum_water87_df['Year'] = '1987'
sum_water87_df['Sector'] = 'Water'

water_releases92 = df92['Water']
sum_water92 = water_releases92.sum()
sum_water92_df = pd.DataFrame([[sum_water92]],columns=['Sum'])
sum_water92_df['Year'] = '1992'
sum_water92_df['Sector'] = 'Water'
# sum_air92_df

water_releases97 = df97['Water']
sum_water97 = water_releases97.sum()
sum_water97_df = pd.DataFrame([[sum_water97]],columns=['Sum'])
sum_water97_df['Year'] = '1997'
sum_water97_df['Sector'] = 'Water'
# sum_air97_df

water_releases02 = df02['Water']
sum_water02 = water_releases02.sum()
sum_water02_df = pd.DataFrame([[sum_water02]],columns=['Sum'])
sum_water02_df['Year'] = '2002'
sum_water02_df['Sector'] = 'Water'
# sum_air02_df

water_releases07 = df07['Water']
sum_water07 = water_releases07.sum()
sum_water07_df = pd.DataFrame([[sum_water07]],columns=['Sum'])
sum_water07_df['Year'] = '2007'
sum_water07_df['Sector'] = 'Water'
# sum_air02_df

water_releases12 = df12['Water']
sum_water12 = water_releases12.sum()
sum_water12_df = pd.DataFrame([[sum_water12]],columns=['Sum'])
sum_water12_df['Year'] = '2012'
sum_water12_df['Sector'] = 'Water'
# sum_air12_df

water_releases17 = df17['Water']
sum_water17 = water_releases17.sum()
sum_water17_df = pd.DataFrame([[sum_water17]],columns=['Sum'])
sum_water17_df['Year'] = '2017'
sum_water17_df['Sector'] = 'Water'
# sum_air17_df

water_releases22 = df22['Water']
sum_water22 = water_releases22.sum()
sum_water22_df = pd.DataFrame([[sum_water22]],columns=['Sum'])
sum_water22_df['Year'] = '2022'
sum_water22_df['Sector'] = 'Water'
# sum_water22_df
In [19]:
water_frames = [sum_water87_df, sum_water92_df, sum_water97_df, sum_water02_df, 
              sum_water07_df, sum_water12_df, sum_water17_df, sum_water22_df]
water_df = pd.concat(water_frames)
water_df
Out[19]:
Sum Year Sector
0 2.255985e+08 1987 Water
0 1.902534e+06 1992 Water
0 6.958372e+06 1997 Water
0 1.801902e+07 2002 Water
0 1.938498e+07 2007 Water
0 1.188364e+07 2012 Water
0 1.129843e+07 2017 Water
0 1.013558e+07 2022 Water

Gathering non-water¶

In [20]:
nonwater_releases87 = df87['Non-Water']
sum_nonwater87 = nonwater_releases87.sum()
sum_nonwater87_df = pd.DataFrame([[sum_nonwater87]],columns=['Sum'])
sum_nonwater87_df['Year'] = '1987'
sum_nonwater87_df['Sector'] = 'Non-Water'

nonwater_releases92 = df92['Non-Water']
sum_nonwater92 = nonwater_releases92.sum()
sum_nonwater92_df = pd.DataFrame([[sum_nonwater92]],columns=['Sum'])
sum_nonwater92_df['Year'] = '1992'
sum_nonwater92_df['Sector'] = 'Non-Water'
# sum_air92_df

nonwater_releases97 = df97['Non-Water']
sum_nonwater97 = nonwater_releases97.sum()
sum_nonwater97_df = pd.DataFrame([[sum_nonwater97]],columns=['Sum'])
sum_nonwater97_df['Year'] = '1997'
sum_nonwater97_df['Sector'] = 'Non-Water'
# sum_air97_df

nonwater_releases02 = df02['Non-Water']
sum_nonwater02 = nonwater_releases02.sum()
sum_nonwater02_df = pd.DataFrame([[sum_nonwater02]],columns=['Sum'])
sum_nonwater02_df['Year'] = '2002'
sum_nonwater02_df['Sector'] = 'Non-Water'
# sum_air02_df

nonwater_releases07 = df07['Non-Water']
sum_nonwater07 = nonwater_releases07.sum()
sum_nonwater07_df = pd.DataFrame([[sum_nonwater07]],columns=['Sum'])
sum_nonwater07_df['Year'] = '2007'
sum_nonwater07_df['Sector'] = 'Non-Water'
# sum_air02_df

nonwater_releases12 = df12['Non-Water']
sum_nonwater12 = nonwater_releases12.sum()
sum_nonwater12_df = pd.DataFrame([[sum_nonwater12]],columns=['Sum'])
sum_nonwater12_df['Year'] = '2012'
sum_nonwater12_df['Sector'] = 'Non-Water'
# sum_air12_df

nonwater_releases17 = df17['Non-Water']
sum_nonwater17 = nonwater_releases17.sum()
sum_nonwater17_df = pd.DataFrame([[sum_nonwater17]],columns=['Sum'])
sum_nonwater17_df['Year'] = '2017'
sum_nonwater17_df['Sector'] = 'Non-Water'
# sum_air17_df

nonwater_releases22 = df22['Non-Water']
sum_nonwater22 = nonwater_releases22.sum()
sum_nonwater22_df = pd.DataFrame([[sum_nonwater22]],columns=['Sum'])
sum_nonwater22_df['Year'] = '2022'
sum_nonwater22_df['Sector'] = 'Non-Water'
# sum_nonwater22_df
In [21]:
nonwater_frames = [sum_nonwater87_df, sum_nonwater92_df, sum_nonwater97_df, sum_nonwater02_df, 
              sum_nonwater07_df, sum_nonwater12_df, sum_nonwater17_df, sum_nonwater22_df]
nonwater_df = pd.concat(nonwater_frames)
nonwater_df
Out[21]:
Sum Year Sector
0 1.687885e+08 1987 Non-Water
0 7.027523e+07 1992 Non-Water
0 5.653254e+07 1997 Non-Water
0 7.249707e+07 2002 Non-Water
0 5.268322e+07 2007 Non-Water
0 3.096776e+07 2012 Non-Water
0 2.538705e+07 2017 Non-Water
0 2.023661e+07 2022 Non-Water
In [22]:
combined_water_frames = [water_df, nonwater_df]
combined_water_df = pd.concat(combined_water_frames)
combined_water_df
Out[22]:
Sum Year Sector
0 2.255985e+08 1987 Water
0 1.902534e+06 1992 Water
0 6.958372e+06 1997 Water
0 1.801902e+07 2002 Water
0 1.938498e+07 2007 Water
0 1.188364e+07 2012 Water
0 1.129843e+07 2017 Water
0 1.013558e+07 2022 Water
0 1.687885e+08 1987 Non-Water
0 7.027523e+07 1992 Non-Water
0 5.653254e+07 1997 Non-Water
0 7.249707e+07 2002 Non-Water
0 5.268322e+07 2007 Non-Water
0 3.096776e+07 2012 Non-Water
0 2.538705e+07 2017 Non-Water
0 2.023661e+07 2022 Non-Water
In [23]:
fig = px.line(combined_water_df, y="Sum", x="Year", 
              title='Sum of Releases, Water vs. Non-Water', color = 'Sector', markers=True)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.3)
fig.update_layout(autosize=False, width=900, height=400)
fig.update_layout(title_y=0.95, title_x=0.5)
fig.update_traces(textposition='top center')
fig.show()

Gathering other¶

In [29]:
other_releases87 = df87['Other']
sum_other87 = other_releases87.sum()
sum_other87_df = pd.DataFrame([[sum_other87]],columns=['Sum'])
sum_other87_df['Year'] = '1987'
sum_other87_df['Sector'] = 'Other Releases'

other_releases92 = df92['Other']
sum_other92 = other_releases92.sum()
sum_other92_df = pd.DataFrame([[sum_other92]],columns=['Sum'])
sum_other92_df['Year'] = '1992'
sum_other92_df['Sector'] = 'Other Releases'

other_releases97 = df97['Other']
sum_other97 = other_releases97.sum()
sum_other97_df = pd.DataFrame([[sum_other97]],columns=['Sum'])
sum_other97_df['Year'] = '1997'
sum_other97_df['Sector'] = 'Other Releases'

other_releases02 = df02['Other']
sum_other02 = other_releases02.sum()
sum_other02_df = pd.DataFrame([[sum_other02]],columns=['Sum'])
sum_other02_df['Year'] = '2002'
sum_other02_df['Sector'] = 'Other Releases'

other_releases07 = df07['Other']
sum_other07 = other_releases07.sum()
sum_other07_df = pd.DataFrame([[sum_other07]],columns=['Sum'])
sum_other07_df['Year'] = '2007'
sum_other07_df['Sector'] = 'Other Releases'

other_releases12 = df87['Other']
sum_other12 = other_releases12.sum()
sum_other12_df = pd.DataFrame([[sum_other12]],columns=['Sum'])
sum_other12_df['Year'] = '2012'
sum_other12_df['Sector'] = 'Other Releases'

other_releases17 = df17['Other']
sum_other17 = other_releases17.sum()
sum_other17_df = pd.DataFrame([[sum_other17]],columns=['Sum'])
sum_other17_df['Year'] = '2017'
sum_other17_df['Sector'] = 'Other Releases'

other_releases22 = df22['Other']
sum_other22 = other_releases22.sum()
sum_other22_df = pd.DataFrame([[sum_other22]],columns=['Sum'])
sum_other22_df['Year'] = '2022'
sum_other22_df['Sector'] = 'Other Releases'
In [30]:
other_frames = [sum_other87_df, sum_other92_df, sum_other97_df, sum_other02_df, 
              sum_other07_df, sum_other12_df, sum_other17_df, sum_other22_df]
other_df = pd.concat(other_frames)
other_df
Out[30]:
Sum Year Sector
0 2.361584e+07 1987 Other Releases
0 4.445440e+06 1992 Other Releases
0 7.991427e+06 1997 Other Releases
0 1.553518e+07 2002 Other Releases
0 1.318236e+07 2007 Other Releases
0 2.361584e+07 2012 Other Releases
0 8.973366e+06 2017 Other Releases
0 6.223706e+06 2022 Other Releases
In [61]:
comp_frames = [air_df, water_df, other_df]
comp_df = pd.concat(comp_frames)
comp_df
Out[61]:
Sum Year Sector
0 1.451727e+08 1987 Air
0 6.582979e+07 1992 Air
0 4.854111e+07 1997 Air
0 5.696188e+07 2002 Air
0 3.950086e+07 2007 Air
0 2.085558e+07 2012 Air
0 1.641368e+07 2017 Air
0 1.401291e+07 2022 Air
0 2.255985e+08 1987 Water
0 1.902534e+06 1992 Water
0 6.958372e+06 1997 Water
0 1.801902e+07 2002 Water
0 1.938498e+07 2007 Water
0 1.188364e+07 2012 Water
0 1.129843e+07 2017 Water
0 1.013558e+07 2022 Water
0 2.361584e+07 1987 Other Releases
0 4.445440e+06 1992 Other Releases
0 7.991427e+06 1997 Other Releases
0 1.553518e+07 2002 Other Releases
0 1.318236e+07 2007 Other Releases
0 2.361584e+07 2012 Other Releases
0 8.973366e+06 2017 Other Releases
0 6.223706e+06 2022 Other Releases
In [93]:
fig = px.line(comp_df, y="Sum", x="Year", 
              title='Sum of Releases, Air vs. Water vs. Other Releases', color = 'Sector', markers=True)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=50))
fig.update_layout(legend_title="Year", title_x=0.3)
fig.update_layout(autosize=False, width=900, height=400)
fig.update_layout(title_y=0.95, title_x=0.5)
fig.update_layout(width=1000)
fig.update_layout(
    title_font=dict(size=20),
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    yaxis = dict(tickfont = dict(size=16)),
    xaxis = dict(tickfont = dict(size=16))
)
fig.show()
In [ ]: